############### ###############
## 10 - analysis - covariates' selection
## Project: CBO
## Author: Kamil Kouhen
## Purpose: Choosing covariates to be used for estimations
## Date of creation: 03/06/2022
############### ###############

# 1. Making list of all possible covariates at the CBO level --------------- ###### 
  
  ## Starting with main_addon which contains baseline and monitoring variables ##

  listcov_cbolevel <- main_addon %>%
    filter(Municipality_included_MON == 1) %>%
    
    ## Handpicking possible covariates ##
    select(
      region, 
      regionname, 
      commune, 
      communename, 
      appcode, 
      
      ### Possible covariates: CBO level
      total_score_quanti_BL, 
      total_score_quali_BL, 
      rank_BL, 
      meet_municipality_BL, 
      status_political_BL, 
      status_religious_BL, 
      score_activemember_BL, 
      score_budget_amount_BL, 
      score_years_in_exist_BL, 
      score_village_active_BL, 
      score_ethnic_number_BL, 
      score_mobilize_people_BL, 
      score_meet_municipality_BL, 
      motivation_BL, 
      competences_BL, 
      experience_anterieure_BL, 
      capacite_mobilisation_BL
    ) 
    
      ### Checking whether they vary enough
    sapply(listcov_cbolevel, table) 
  
    ### Removing irrelevant variables and those with hidden missing values in factor variable
  listcov_cbolevel %<>% 
    select(-rank_BL) %>% ##Exists under other form
           
  ## Exporting summary statistics ## 
  write_xlsx(cbind(rownames(sumstats(listcov_cbolevel)), sumstats(listcov_cbolevel)), 
             here("Output", "For Covariate Selection", "Summary statistics - cbo level.xlsx"))
  
# 2. Making list of all possible covariates at the commune level --------------- ###### 
  listcov_communelevel <- main_addon %>%
    distinct(commune, .keep_all = T) %>%
    filter(Municipality_included_MON == 1) %>%
    select(region, commune, 
           ends_with("_BL")) %>%
    
    ## Removing variables at the CBO level ##
    select(-(cboname_BL:communetype_BL)) %>%
    
    ## Starting with SUPERMUN indicators in 2018 ## 
    left_join(SUPERMUN_adjusted %>% 
                filter(year == 2018) %>%
                rename_at(vars(-(regionname:year)), ~ paste0(., '2018_S')) %>%
                select(commune, contains("2018")), 
              by = "commune") %>%
    select(-starts_with("value_personnel")) %>% ##Removing some that seem unnecessary
    
    ## Adding commune type and population size ##
    left_join(Population_stats_adjusted %>%
                mutate(Rural_commune = ifelse(rural == 1, 1, 0)) %>%
                set_variable_labels(Rural_commune = "1 if municipality is rural.") %>%
                rename("Popsize_2018" = compop2018) %>%
                set_variable_labels(Popsize_2018 = "Population size of municipality in 2018.") %>%
                select(commune, Rural_commune, Popsize_2018), 
              by = "commune") %>%
  
    ## Adding number of fatalities from conflicts in 2018 ##
    left_join(ACLED_adjusted %>%
                filter(year == 2018) %>%
                select(commune, fatalities) %>%
                rename("fatalities_2018" = fatalities) %>%
                set_variable_labels(fatalities_2018 = "Number of fatalities from conflicts in 2018 in municipality."), 
              by = "commune")

    ## Checking if they vary enough ##
    sumstats(listcov_communelevel)
    
  ## Exporting summary statistics ## 
  write_xlsx(cbind(rownames(sumstats(listcov_communelevel)), sumstats(listcov_communelevel)), 
             here("Output", "For Covariate Selection", "Summary statistics - commune level.xlsx"))
  
# 3. Exporting regression results: hypothesis 1 --------------- ###### 
  
  ## A_breadth_of_engagement: Breadth of CBO engagement in muni. governance ##
  
  ### Handpicking covariates that make sense theoritically at the CBO level
  Possible.cov <- c("activemember_BL", "village_active_BL", "mobilize_people_BL", 
                    "capacite_mobilisation_BL", "ethnic_number_BL")
  
  est <- lm(A_Breadth_of_engagement ~ ., 
            data = (CBO_groups_final %>% select(A_Breadth_of_engagement, all_of(Possible.cov), region, situation)), 
            na.action(na.omit))
  coeftest(est, 
           vcov = vcovHC(est, 
                         type = "HC0")) 
  
  ### Handpicking covariates that make sense theoretically at the commune level
  Possible.cov <- c("rural_commune_BL", "popsize_2018_BL", "fatalities_2018_BL",
                    "score_meetings12018_S", "score_attendance2018_S"
                    )
  
  est <- lm(A_Breadth_of_engagement ~ ., 
            data = (CBO_groups_final %>% select(A_Breadth_of_engagement, all_of(Possible.cov), region, situation)), 
            na.action(na.omit))
  coeftest(est, 
           vcov = vcovHC(est, 
                         type = "HC0")) 
  
  ## A_Intensity_of_engagement: Intensity of CBO engagement in muni. governance ##
  
  ### Handpicking covariates that make sense theoritically at the CBO level
  Possible.cov <- c("activemember_BL", "village_active_BL", "mobilize_people_BL", 
                    "capacite_mobilisation_BL", "ethnic_number_BL")
  
  est <- lm(A_Intensity_of_engagement ~ ., 
            data = (CBO_groups_final %>% select(A_Intensity_of_engagement, all_of(Possible.cov), region, situation)), 
            na.action(na.omit))
  coeftest(est, 
           vcov = vcovHC(est, 
                         type = "HC0")) 
  
  ### Handpicking covariates that make sense theoretically at the commune level
  Possible.cov <- c("rural_commune_BL", "popsize_2018_BL", "fatalities_2018_BL",
                    "score_meetings12018_S", "score_attendance2018_S"
  )
  
  est <- lm(A_Intensity_of_engagement ~ ., 
     data = (CBO_groups_final %>% select(A_Intensity_of_engagement, all_of(Possible.cov), region, situation)), 
     na.action(na.omit))
  coeftest(est, 
           vcov = vcovHC(est, 
                         type = "HC0")) 
  
  ## A_DM_know_and_inter: Breadth of DMs' knowledge and intereaction with CBOs ##
  
  ### Handpicking covariates that make sense theoritically at the DM/CBO level
  
  est <- lm(A_DM_know_and_inter ~ factor(liveincommune) + factor(attended_school) + 
     years_in_position + as.numeric(as.character(french)) + as.numeric(as.character(literate)) +
     factor(region) + factor(situation),
     data = (DM_finalx2), 
     weights = DM_weight,
     na.action(na.omit)) 
  
  coeftest(est, 
           vcov = vcovCL(est, 
                         #Clustering at CBO level
                         cluster = factor(DM_finalx2$appcode), #CBO id 
                         type = "HC0")) 
  
  ### Handpicking covariates that make sense theoretically at the commune level
  Possible.cov <- c("rural_commune_BL", "popsize_2018_BL", "fatalities_2018_BL",
                    "score_meetings12018_S", "score_attendance2018_S"
  )
  
  est <- lm(A_DM_know_and_inter ~ factor(rural_commune_BL) + popsize_2018_BL + 
              fatalities_2018_BL + score_meetings12018_S + score_attendance2018_S,
            data = (DM_finalx2), 
            weights = DM_weight,
            na.action(na.omit)) 
  
  coeftest(est, 
           vcov = vcovCL(est, 
                         #Clustering at CBO level
                         cluster = factor(DM_finalx2$appcode), #CBO id 
                         type = "HC0")) 
  
# 4. Exporting regression results: hypothesis 2 --------------- ###### 
  
# 5. Exporting regression results: hypothesis 3 --------------- ###### 
  
  ## A_bin_decideurs_important: Community groups and civil society organizations are among the three
   # most important actors to keep happy in order to maintain influence ##
  
  ### Handpicking covariates that make sense theoritically at the DM level
  
  est <- lm(A_bin_decideurs_important ~ 
              popsize_2018_BL_ctrl +
              fatalities_2018_BL +
              score_meetings12018_S +
              score_attendance2018_S +
              factor(respondent_type) +
              factor(region) +
              factor(situation), 
            data = DM_final, 
            weights = DM_weight, 
            na.action(na.omit))
  
  set.seed(seed) #Setting seed before every bootstrap estimation
  coeftest(est, 
           vcov = vcovBS(est, 
                         cluster = factor(DM_final$commune), #Municipality id
                         R = 250)) #250 replicates
  
 
  
  
  